import re


def clean_des(string):
    """
    将string中的<div>标签清理掉,否则<div>将会构建全局的索引
    :param string:
    :return:
    """
    pos1 = string.find("<")
    pos2 = string.find(">")
    while pos1 != -1:
        # 删除pos1 到 pos2中间的
        string = string[:pos1]+string[pos2+1:]
        pos1 = string.find("<")
        pos2 = string.find(">")
    return string


def remove_punctuation(line):
    rule = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5 ]")
    line = rule.sub('', line)
    return line


if __name__ == "__main__":
    string = "~^&%$#q"
    print(remove_punctuation(string))
